;;   This file is part of scheme-GNUnet, a partial Scheme port of GNUnet.
;;   Copyright (C) 2021 Maxime Devos <maximedevos@telenet.be>
;;
;;   scheme-GNUnet is free software: you can redistribute it and/or modify it
;;   under the terms of the GNU Affero General Public License as published
;;   by the Free Software Foundation, either version 3 of the License,
;;   or (at your option) any later version.
;;
;;   scheme-GNUnet is distributed in the hope that it will be useful, but
;;   WITHOUT ANY WARRANTY; without even the implied warranty of
;;   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
;;   Affero General Public License for more details.
;;
;;   You should have received a copy of the GNU Affero General Public License
;;   along with this program.  If not, see <http://www.gnu.org/licenses/>.
;;
;;   SPDX-License-Identifier: AGPL-3.0-or-later
(import (tests utils)
	(quickcheck)
	(quickcheck property)
	(quickcheck arbitrary)
	(quickcheck generator)
	(gnu gnunet utils tokeniser)
	(gnu gnunet utils bv-slice)
	(srfi srfi-1)
	(srfi srfi-8)
	(srfi srfi-43)
	(only (ice-9 control) let/ec)
	(ice-9 match)
	(only (system foreign)
	      pointer->bytevector bytevector->pointer)
	(only (rnrs base) assert)
	(only (rnrs exceptions) guard)
	(only (rnrs conditions)
	      assertion-violation? condition-who)
	(only (rnrs io ports)
	      open-bytevector-input-port)
	(rnrs bytevectors)
        (gnu gnunet netstruct syntactic)
	(gnu gnunet util struct))

(define (fluffed-bytevector %size %off fluff)
  ;; Returned bytevector is a complete message.
  (let* ((size (+ %size (sizeof /:message-header '())))
	 (bv (make-bytevector (+ %off size)))
	 (s (bv-slice/read-write bv)))
    (bytevector-copy! fluff 0 bv 0
		      (min (bytevector-length fluff)
			   (bytevector-length bv)))
    (set%! /:message-header '(size)
	   (slice-slice s %off (sizeof /:message-header '()))
	   size)
    (values bv %off size)))

(test-begin "tokeniser")

(define (no-return/overly-small . _)
  (error "unexpected call to return/overly-small"))

(define (no-return/done . _)
  (error "unexpected call to return/done"))

(define (no-return/done-eof . _)
  (error "unexpected call to return/done-eof"))

(define (no-return/premature-eof . _)
  (error "unexpected call to return/premature-eof"))

(define (no-handle/message . _)
  (error "unexpected call to handle/message"))

;; Some bugs this found:
;;  * in some places, the 'offset' argument was ignored
;;    and always the first or first two bytes of 'bv'
;;    in 'continue' in 'add-bytevector!' would be used.
;;  * some incorrect assertions in the tokeniser code
;;  * when a message was fragmented (between header and data),
;;    the data was not copied
;;  * the type of a message was calculated incorrectly
;;    whe ‘overly small message errors’ are reported
;;  * the type of a message could not be calculated
;;    for some fragmented messages, if the first 'length'
;;    was 1 and the second 'length' was 3.

(test-assert "[prop] complete messages are passed through"
  (quickcheck
   (property
     ((%size $natural)
      (%off  $natural)
      (fluff $bytevector))
     (receive (bv offset size)
	 (fluffed-bytevector %size %off fluff)
       (let ((handled? #f))
	 (add-bytevector!
	  (make-tokeniser)
	  bv offset size
	  (lambda (bv2 offset2 length)
	    (assert (not handled?))
	    (assert (eq? bv bv2))
	    (assert (= offset offset2))
	    (assert (= length size))
	    (set! handled? #t))
	  (lambda _ handled?)
	  no-return/overly-small))))))

;; Test fragmented messages and multiple messages
;; are properly handled.

(define choose-message
  (generator-let*
   ((length (choose-one/weighted
	     ;; Very small
	     `((1 . ,(choose-integer 4 5))
	       (1 . ,(choose-integer 5 6))
	       ;; Some length
	       (2 . ,(choose-integer 4 9)))))
    ;; Arbitrary 'type' field and data
    (filler (choose-bytevector (- length 2))))
   (let ((bv (make-bytevector length)))
     (bytevector-u16-set! bv 0 length (endianness big))
     (bytevector-copy! filler 0 bv 2 (bytevector-length filler))
     (generator-return bv))))

;; Generate a list of message bytevectors
(define choose-many-messages
  (sized-generator
   (cut choose-list choose-message <>)))

(define (merge-bytevectors messages)
  (define size (reduce + 0 (map bytevector-length messages)))
  (define bv (make-bytevector size))
  (let loop ((offset 0) (messages messages))
    (if (null? messages)
	bv
	(let* ((head (car messages))
	       (tail (cdr messages))
	       (message-size (bytevector-length head)))
	  (bytevector-copy! head 0 bv offset message-size)
	  (loop (+ offset message-size) tail)))))

;; Try to occassionally split message in annoying places,
;; and avoid splitting at message boundaries.
(define (choose-split-positions messages)
  (let loop ((offset 0) (messages messages))
    (if (null? messages)
	(generator-return '())
	(let* ((head (car messages))
	       (tail (cdr messages))
	       (message-size (bytevector-length head))
	       (data-splittable? (> message-size 5)))
	  (generator-let*
	   ((rest-positions
	     (loop (+ offset message-size) tail))
	    (data-split-positions
	     (if data-splittable?
		 (generator-lift
		  list
		  (choose-integer 4 message-size))
		 (generator-return '())))
	    (end-split-positions
	     (choose-one/weighted
	      `((2 . ,(generator-return '()))
		(1 . ,(generator-return (list message-size))))))
	    (head-split-positions
	     (choose-one/weighted
	      `((3 . ,(generator-return '()))  ; don't split header
		(2 . ,(generator-return '(1))) ; split inside size field
		(2 . ,(generator-return '(2))) ; split between size field and type
		(1 . ,(generator-return '(1 2))))))) ; both of above
	   (let* ((all-positions
		   (append head-split-positions data-split-positions
			   end-split-positions))
		  (fixed-positions
		   (map (cut + <> offset) all-positions)))
	     (generator-return
	      (append fixed-positions rest-positions))))))))

;; A list of (start . length).
;; Starts at the minimal 'start', and ends at 'end' (exclusive)
(define* (positions->ranges positions end)
  (match positions
    (() `((,end . 0)))
    ((start) `((,start . ,(- end start))))
    ((start next . rest)
     `((,start . ,(- next start))
       ,@(positions->ranges `(,next ,@rest) end)))))

(define $messages-and-ranges
  (arbitrary
   (gen (generator-let*
	 ((messages choose-many-messages)
	  (bv (generator-return
	       (merge-bytevectors messages)))
	  (split-positions
	   (choose-split-positions messages))
	  (ranges
	   (generator-return
	    (positions->ranges (cons 0 split-positions)
			       (bytevector-length bv)))))
	 (generator-return
	  `#(,messages ,bv ,ranges))))
   (xform #f))) ; unneeded

;; A simplified test failure case of
;; "[prop] all fragmented & multiple messages received".
;; The issue was that (1 2 3 4) was not copied.
(test-equal "message fragmented on header/data boundary reassembled"
  #vu8(0 8 50 50 1 2 3 4)
  (let ((tok (make-tokeniser))
	;; Message size: 8
	(received? #f)
	(bv #vu8(0 8 50 50 1 2 3 4)))
    (add-bytevector! tok bv 0 4
		     no-handle/message (const #t) no-return/overly-small)
    (add-bytevector! tok bv 4 4
		     (lambda (bv offset length)
		       ;; These two assertions are actually an implementation
		       ;; detail, and test no overly large allocations are
		       ;; made.
		       (assert (= 0 offset))
		       (assert (= length (bytevector-length bv)))
		       (assert (not received?))
		       (set! received? (bytevector-copy bv)))
		     (const #t) no-return/overly-small)
    received?))

;; Found when debugging a test failure of
;; "[prop] all fragmented & multiple messages received".
;; The bug was a missing set-position! call.
(test-equal "message fragmented in size field and after message header, some data"
  #vu8(0 6 236 197 216 19)
  (let ((tok (make-tokeniser))
	(received? #f)
	(bv #vu8(0 6 236 197 216 19)))
    ;; copy the zero
    (add-bytevector! tok bv 0 1
		     no-handle/message (const #t) no-return/overly-small)
    ;; copy the rest of the message header
    (add-bytevector! tok bv 1 3
		     no-handle/message (const #t) no-return/overly-small)
    ;; copy the data
    (add-bytevector! tok bv 4 2
		     (lambda (bv offset length)
		       ;; see previous test case
		       (assert (= 0 offset))
		       (assert (= length (bytevector-length bv)))
		       (assert (not received?))
		       (set! received? (bytevector-copy bv)))
		     (const #t)
		     no-return/overly-small)
    received?))

;; And return/done is called in tail position.
(test-assert "[prop] all fragmented & multiple messages received"
  (quickcheck
   (property
     ((messages-and-ranges $messages-and-ranges))
     (match messages-and-ranges
       (#(messages bv ranges)
	(assert (= (apply + (map cdr ranges))
		   (bytevector-length bv)))
	(guard (e ((assertion-violation? e)
		   ;; 2: don't include 'make-stack' or
		   ;; this guard
		   (display-backtrace (make-stack #t 2) (current-error-port))
		   (print-exception (current-error-port) #f '%exception (list e))
		   #f))
	  (let ((tok (make-tokeniser))
		(remove-message!
		 (lambda (bv offset length)
		   (define bv/range
		     (pointer->bytevector
		      (bytevector->pointer bv offset)
		      length))
		   ;; Sanity check
		   (assert (<= 0 offset))
		   (assert (<= (+ offset length) (bytevector-length bv)))
		   (let/ec ec
		     (pair-for-each
		      (match-lambda
			(((and message (set! set-message!)) . rest)
			 (when (and (bytevector? message)
				    (bytevector=? message bv/range))
			   (set-message! #f) ; mark it as received
			   (ec))))
		      messages) ; stop searching
		     (assert (and #f
				  "message not added but still received"))))))
	    (for-each
	     (match-lambda
	       ((start . length)
		(assert
		 (calls-in-tail-position?
		  (lambda (return/done)
		    (add-bytevector! tok bv start length
				     remove-message!
				     (lambda () (return/done))
				     no-return/overly-small))))))
	     ranges)))
	;; All messages should have been received.
	(not (any identity messages)))))))

;; The type was read at an incorrect offset.
(test-equal "overly small message error (complete header)"
  (map (lambda (n)
	 `(#t ; in tail position
	   ,(+ (* 256 n) (+ n 1)) ; message type
	   ,n)) ; message size
       (iota 4))
  (map (lambda (n)
	 (call-with-values
	     (lambda ()
	       (calls-in-tail-position?
		(lambda (return/overly-small)
		  (add-bytevector! (make-tokeniser)
				   (u8-list->bytevector
				    ;; n (+ n 1): arbitrary message type.
				    ;; Two separate values are used for
				    ;; the two halves of the u16, to
				    ;; detect little / big endianness issues.
				    ;;
				    ;; GNUnet usually (always?) uses
				    ;; big-endian.
				    (list 0 n n (+ n 1)))
				   0 4
				   no-handle/message
				   no-return/done
				   return/overly-small))))
	   list))
       ;; 4: size of message header
       ;; iota makes a list '(0 1 2 3)
       (iota 4)))

;; A bounds check at the call to return/overly-small
;; was overly strict, resulting in the message type being missing.
(test-equal "overly small message error (header split in size field)"
  (map (lambda (n)
	 `(#t ; in tail position
	   ,(+ (* 256 (+ n 1)) n) ; message type
	   ,n))
       (iota 4))
  (map (lambda (n)
	 (let ((tok (make-tokeniser))
	       (bv (u8-list->bytevector
		    ;; see previous test case for why (+ n 1) n
		    (list 0 n (+ n 1) n))))
	   (add-bytevector! tok bv 0 1
			    no-handle/message
			    (const #t)
			    no-return/overly-small)
	   (call-with-values
	       (lambda ()
		 (calls-in-tail-position?
		  (lambda (return/overly-small)
		    (add-bytevector! tok bv 1 3
				     no-handle/message
				     no-return/done
				     return/overly-small))))
	     list)))
       (iota 4))) ; see previous test case for why (iota 4)

;; All the previous tests use 'small' messages. That is,
;; the message sizes were always < 256.  However, messages
;; with size >= 256 definitely exist.
;;
;; This test detects the mutation
;;   (bytevector-u8-ref bv offset) --> 0
;; in (! size/byte-0 [...]).

(define huge-bv
  (let ((bv (make-bytevector #xfffe 17)))
    (bytevector-u16-set! bv 0 #xfffe (endianness big))
    bv))

;; Tests:
;;   * the whole message is received
;;   * return/done is called in tail position
(test-equal "huge message, split early"
  (map (const #t) (iota 16))
  (map (lambda (split-position)
	 (let ((tok (make-tokeniser))
	       (received? #f))
	   (receive (in-tail-position?)
	       (calls-in-tail-position?
		(lambda (return/done)
		  (add-bytevector! tok huge-bv 0 split-position
				   no-handle/message
				   return/done
				   no-return/overly-small)))
	     (assert in-tail-position?))
	   (receive (in-tail-position?)
	       (calls-in-tail-position?
		(lambda (return/done)
		  (add-bytevector! tok huge-bv split-position
				   (- #xfffe split-position)
				   (lambda (bv offset length)
				     (assert (not received?))
				     ;; really an implementation detail,
				     ;; but no bytevector-range-copy
				     ;; exists.
				     (assert (= 0 offset))
				     (assert (= length (bytevector-length bv)))
				     (set! received?
					   (bytevector-copy bv)))
				   return/done
				   no-return/overly-small)))
	     (assert in-tail-position?))
	   (equal? huge-bv received?)))
       (iota 16)))

(define (catch-errors thunk)
  (guard (e ((interrupted-tokeniser-violation? e)
	     `(,(condition-who e) . interrupted))
	    ((kaput-tokeniser-error? e)
	     `(,(condition-who e) . kaput)))
    (thunk)))

(test-equal "re-entrancy from message handler is detected (complete message)"
  '(add-bytevector! . interrupted)
  (let ((tok (make-tokeniser)))
    (catch-errors
     (lambda ()
       (add-bytevector! tok #vu8(0 4 0 0) 0 4
			(lambda (bv offset length)
			  (add-bytevector! tok #vu8(0 4 1 1) 0 4
					   no-handle/message
					   no-return/done
					   no-return/overly-small)
			  (assert #f))
			no-return/done
			no-return/overly-small)))))

(test-equal "tokeniser becomes kaput, split after size field"
  '(add-bytevector! . kaput)
  (let ((tok (make-tokeniser))
	(bv #vu8(0 3)))
    (receive (tail? type size)
	(calls-in-tail-position?
	 (lambda (return/overly-small)
	   (add-bytevector! tok bv 0 2 no-handle/message
			    no-return/done
			    return/overly-small)))
      (assert (eq? #f type))
      (assert (= size 3))
      (assert tail?))
    (catch-errors
     (lambda ()
       (add-bytevector! tok #vu8(0) 0 1
			no-handle/message no-return/done no-return/overly-small)
       (error "unreachable")))))

(test-equal "tokeniser becomes kaput, split inside size field"
  '(add-bytevector! . kaput)
  (let ((tok (make-tokeniser))
	(bv #vu8(0 3 4 5)))
    (receive (tail?)
	(calls-in-tail-position?
	 (lambda (return/done)
	   (add-bytevector! tok bv 0 1 no-handle/message
			    return/done
			    no-return/overly-small)))
      (assert tail?))
    (receive (tail? type size)
	(calls-in-tail-position?
	 (lambda (return/overly-small)
	   (add-bytevector! tok bv 1 2 no-handle/message
			    no-return/done
			    return/overly-small)))
      (assert tail?)
      (assert (= size 3))
      (assert (eq? type #f)))
    (catch-errors
     (lambda ()
       (add-bytevector! tok bv 2 2
			no-handle/message no-return/done
			no-return/overly-small)
       (error "unreachable")))))

(test-equal "eof detected"
  '(#t)
  (receive result
      (calls-in-tail-position?
       (lambda (return/done-eof)
	 (add-from-port! (make-tokeniser) (%make-void-port "r")
			 no-handle/message no-return/overly-small
			 return/done-eof no-return/premature-eof)))
    result))

(test-equal "eof detected (complete data)"
  '(#t)
  (receive result
      (calls-in-tail-position?
       (lambda (return/done-eof)
	 (define handled? #f)
	 (define (handle/message bv offset length)
	   (assert (= length 4))
	   ;; Verify the received message is correct
	   (assert (= (bytevector-u32-ref bv offset (endianness big))
		      (bytevector-u32-ref #vu8(0 4 0 0) 0 (endianness big))))
	   (assert (not handled?))
	   (set! handled? #t))
	 (add-from-port! (make-tokeniser)
			 (open-bytevector-input-port #vu8(0 4 0 0))
			 handle/message no-return/overly-small return/done-eof
			 no-return/done-eof)))
    result))

(test-equal "premature eof detected"
  '(#t)
  (receive result
      (calls-in-tail-position?
       (lambda (return/premature-eof)
	 ;; 4 bytes are expected, but only the stream only has 3.
	 (add-from-port! (make-tokeniser) (open-bytevector-input-port #vu8(0 4 0))
			 no-handle/message no-return/overly-small no-return/done-eof
			 return/premature-eof)))
    result))

(test-equal "add-from-port! and partial messages (split at header)"
  #vu8(0 8 2 3 4 5 6 7)
  (let ((tok (make-tokeniser))
	(message #f))
    (add-bytevector! tok #vu8(0 8 2 3) 0 4 no-handle/message
		     (const #t) no-return/overly-small)
    (add-from-port! tok (open-bytevector-input-port #vu8(4 5 6 7))
		    (lambda (bv offset length)
		      (assert (not message))
		      (let ((bv2 (make-bytevector length)))
			(bytevector-copy! bv offset bv2 0 length)
			(set! message bv2)))
		    no-return/overly-small (lambda () message)
		    no-return/premature-eof)))

(test-equal "kaput tokeniser and add-from-port!"
  '(add-from-port! . kaput)
  (let ((tok (make-tokeniser))
	(bv #vu8(0 3 4 5)))
    ;; Make the tokeniser kaput (overly small message size)
    (add-bytevector! tok bv 0 4 no-handle/message no-return/done
		     (const #t))
    ;; And feed it some bytes (with add-from-port!) anyway.
    (catch-errors
     (lambda ()
       (add-from-port! tok (open-bytevector-input-port #vu8(1 2 3 4))
		       no-handle/message no-return/overly-small
		       no-return/done-eof no-return/premature-eof)
       (error "unreachable")))))

(test-end "tokeniser")
